Author

Aiden Kelly

Data

Code
datatable(Anames)
Warning in instance$preRenderHook(instance): It seems your data is too big for
client-side DataTables. You may consider server-side processing:
https://rstudio.github.io/DT/server.html

Summing Num of Allisons

Code
temp <- Anames |>
  filter(Name == 'Allison') |>
  select(Year:Count) |>
  rename(Sex = Gender) |>
  pivot_wider(names_from = Year,
              values_from = Count) |>
  mutate(across(.col = `1997`:`2014`,
                .fns = ~replace_na(.x ,
                                   0)))
temp |>
  kable(format = "html",
        caption = "==>Occurance of the name Allison by state and year<==") |>
  kable_classic(html_font = "FiraCode Nerd Font")
==>Occurance of the name Allison by state and year<==
Sex State 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014
F AK 19 10 13 14 9 14 14 13 10 18 13 15 13 12 8 11 16 10
F AL 121 148 124 91 82 83 88 68 86 68 78 83 82 71 66 67 66 63
F AR 78 64 67 72 82 73 61 65 71 79 55 64 73 60 61 50 67 56
F AZ 98 103 96 97 98 83 80 94 91 109 91 133 147 126 93 110 108 123
F CA 565 638 571 538 539 554 545 591 580 581 654 844 973 854 845 834 803 904
F CO 100 100 106 92 72 69 95 73 87 70 89 95 119 92 96 69 80 90
F CT 82 102 89 81 63 71 60 72 54 54 55 51 54 43 44 41 48 35
F DC 15 15 8 8 13 12 19 13 18 16 19 29 41 15 19 22 22 17
F DE 19 12 25 12 19 18 14 18 14 14 10 16 23 18 15 17 14 16
F FL 253 308 224 241 220 244 235 213 228 225 239 265 290 248 246 270 244 262
F GA 196 215 173 209 176 147 166 169 157 164 159 200 226 197 154 185 199 165
F HI 6 15 12 12 11 6 9 17 6 10 9 15 12 14 6 12 0 11
F IA 130 127 123 109 104 80 98 101 82 91 78 65 56 49 49 50 45 40
F ID 19 29 29 16 20 30 22 32 33 24 21 24 32 29 27 23 21 20
F IL 364 452 362 295 320 334 262 307 275 263 248 275 284 246 202 203 220 198
F IN 239 238 241 198 198 200 182 180 183 156 146 161 128 135 132 136 115 99
F KS 75 109 77 81 85 105 72 80 75 66 60 64 66 67 63 57 35 46
F KY 130 156 137 134 129 126 123 85 104 112 108 104 82 98 82 73 56 66
M KY 0 0 0 0 0 0 0 20 0 0 0 0 0 0 0 0 0 0
F LA 109 98 113 64 76 65 74 65 73 49 48 64 53 51 50 48 50 59
F MA 190 201 174 142 179 148 147 146 129 113 106 104 85 72 74 86 66 56
F MD 145 168 153 121 131 129 119 110 90 116 134 116 141 131 97 96 115 117
F ME 37 33 30 15 22 20 20 16 14 19 18 16 19 19 13 11 10 8
F MI 338 339 297 291 246 269 261 253 225 215 208 197 168 166 160 124 120 137
F MN 191 191 186 175 154 146 142 147 109 120 129 128 113 109 109 71 77 77
F MO 211 256 228 199 171 153 191 155 171 169 115 158 134 140 118 104 122 87
F MS 61 51 56 43 54 56 41 49 46 47 41 41 38 33 45 36 39 40
F MT 15 22 15 16 8 11 14 7 11 12 10 17 17 8 15 13 8 7
F NC 216 220 199 187 177 206 166 175 156 164 150 219 238 226 188 183 189 176
F ND 27 30 20 22 18 20 20 11 14 17 10 18 10 14 7 10 8 9
F NE 62 68 56 56 44 63 50 47 50 45 35 35 30 40 35 33 27 31
F NH 32 38 32 19 32 34 30 21 28 37 14 11 18 15 20 12 9 10
F NJ 216 223 225 177 189 174 158 177 166 162 143 171 187 160 115 131 140 138
F NM 27 21 17 15 21 24 13 14 15 15 14 43 26 24 23 28 26 33
F NV 22 33 40 29 27 38 32 24 24 40 43 50 67 50 43 52 53 62
F NY 412 408 335 315 322 335 311 288 278 281 284 299 351 336 307 271 291 323
F OH 496 477 431 398 376 372 343 307 335 311 255 263 240 198 185 154 175 171
F OK 90 107 98 87 81 81 75 93 74 69 78 67 69 80 76 69 72 55
F OR 67 84 86 55 67 81 72 66 71 55 64 62 61 52 56 61 58 68
F PA 343 402 324 281 283 296 261 247 242 214 216 203 206 148 161 176 159 145
F RI 26 25 24 21 27 26 19 14 14 14 12 15 18 16 7 6 11 11
F SC 78 87 72 58 66 60 53 58 74 71 62 79 65 76 53 79 81 56
F SD 31 33 22 27 17 33 22 26 23 21 16 14 11 19 16 17 12 16
F TN 201 203 157 158 139 129 122 149 124 123 130 127 141 96 129 131 113 116
F TX 482 497 484 447 457 440 402 465 407 435 469 656 842 714 657 724 817 797
F UT 72 83 76 73 61 74 55 64 60 51 55 69 66 56 52 55 61 42
F VA 210 227 210 176 173 152 164 187 154 173 172 221 196 190 163 152 148 152
F VT 13 15 12 15 9 11 7 11 7 6 10 0 0 0 6 0 8 5
F WA 121 122 117 116 125 111 102 95 91 110 113 109 120 94 101 136 73 100
F WI 163 183 193 150 159 161 154 135 148 125 117 112 100 102 121 83 79 82
F WV 56 67 58 56 52 62 59 50 44 33 40 38 38 35 38 21 38 28
F WY 5 8 6 10 6 8 6 8 10 8 7 12 10 12 5 8 8 5
Code
allison_f <- temp |>
  filter(Sex == 'F') |>
  select(State:`2014`)

allison_f |>
  summarise(across(.col = `1997`:`2014`,
                   .fns = ~sum(.x))) |>
  pivot_longer(cols = `1997`:`2014`,
               values_to = 'Count') |>
  ggplot(mapping = aes(x = `name`,
                       y = `Count`)) +
  geom_point() +
  labs(x = 'Year',
       y ='',
       title = 'Number of AFAB kids named Allison in US by year')

GLM

Code
allison_linear <- allison_f |>
  summarise(across(.col = `1997`:`2014`,
                   .fns = ~sum(.x))) |>
  pivot_longer(cols = `1997`:`2014`,
               values_to = 'Count') |>
  mutate(`name` = as.numeric(`name`)) |>
  lm(`Count` ~ `name`, data = _)

allison_linear |> 
  broom::augment() |> 
  ggplot(mapping = aes(y = .resid, x = .fitted)) +
  geom_point()

regression equation: 209690 - 102x = y

Residuals line:

Since there are long stretches of negative residuals on the plot and it is a time series, there could be a violation of the independence assumption

I believe that the name is going out of favor because at a 5% significance there was enough evidence to conclude that there is a negative linear association between years and kids named Allision. Additionally, 58% of the variance in the mean number of kids named Allision is due to time, which is a good amount.

Code
names <- c('Allan', 'Alan', 'Allen')
Anames |>
  filter(Name %in% names) |>
  filter(Gender == 'M') |>
  group_by(Name, 
           Year) |>
  summarise(`Count` = sum(`Count`)) |>
  ggplot(mapping = aes(x = `Year`,
                       y = `Count`)) +
    geom_point()
`summarise()` has grouped output by 'Name'. You can override using the
`.groups` argument.

Code
CAvsPA <- Anames |>
  filter(Name %in% names,
         Gender == 'M',
         Year == '2000',
         State == 'CA' | State == 'PA') |>
  select(!(Gender | Year)) |>
  pivot_wider(names_from = `Name`,
              values_from = `Count`)
# table
CAvsPA |>
  kable(format = "html",
        caption = "==>Occurance of the  3 Alan names in PA and CA<==") |>
  kable_classic(html_font = "FiraCode Nerd Font")
==>Occurance of the 3 Alan names in PA and CA<==
State Alan Allen Allan
CA 579 176 131
PA 51 56 12
Code
CA <- CAvsPA[1, ]
PA <- CAvsPA[2, ]
percents <- function(x){
  temp <- x[1]
  x[1] <- 0
  tot <- sum(x)
  x <- x / tot
  x[1] <- temp
  return(x)
}

# percents
percs <- percents(CA)
#rbind usuage found: https://www.statology.org/r-append-to-data-frame/
rbind(percs, percents(PA))  |>
  kable(format = "html",
        caption = "==>Occurance of the name Allison by state and year in %<==") |>
  kable_classic(html_font = "FiraCode Nerd Font")
==>Occurance of the name Allison by state and year in %<==
State Alan Allen Allan
CA 0.6534989 0.1986456 0.1478555
PA 0.4285714 0.4705882 0.1008403